********************************************************************************
** 	TITLE:		esA2019_cis_contact_networks                                  ** 	
**  AUTHOR:	    Philippe Mongrain                                             **
**	DATA:       3242                                                          **
**	DATE:		October 2022 					                              **	
**	VERSION:	Stata 16					                                  **	
********************************************************************************

* Version control

version 16.0

* Open log file

capture log close       			  			              
log using "esA2019_cis_contact_networks", replace

* Open the dataset

use "3242.dta", clear


***************************
** INDEPENDENT VARIABLES **
***************************

* Time

replace e103 = 2019 if e103 == 19

gen surveydate = mdy(e102,e101,e103)

format %tdMon_DD,_CCYY surveydate

gen edate = 20190428

gen electiondate = date(string(edate,"%8.0f"),"YMD")

format %tdMon_DD,_CCYY electiondate

gen time = electiondate - surveydate

* Gender

gen male = p22

recode male (2=0)

* Age

gen age = p23

replace age = . if age < 17

* Education

gen education = p24a

recode education (10=.)
recode education (99=.)

* Interest for politics

gen interest = p1

recode interest (9=.)

replace interest = 5 - interest

* Party wants to win (national)

gen party = p16

recode party (98=.)
recode party (99=.)

replace party = 0 if party!=2 & party!=.
replace party = 1 if party == 2

label define party 0 "Loser" 1 "Winner"
label values party party

* Political discussion

gen discussion = p2 

recode discussion (9=.)

replace discussion = 4 - discussion 

gen discussion_3pts = discussion
replace discussion_3pts = 0 if discussion == 1
replace discussion_3pts = 1 if discussion == 2
replace discussion_3pts = 2 if discussion == 3


*********************************
** NATIONAL-LEVEL EXPECTATIONS **
*********************************

* Forecasts

gen forecast_whole = p15

recode forecast_whole (99=.)

* Identify correct forecasts

gen correct_whole_d = 0 if forecast_whole!=2 & forecast_whole!=.
replace correct_whole_d = 1 if forecast_whole == 2 & forecast_whole!=.


**********
** SAVE **
**********

save "esA2019_cis.dta", replace


*******************
** MAIN ANALYSES **
*******************

* Regression analysis and percentage of correct and incorrect forecasts

logistic correct_whole_d discussion i.party interest age i.male education time
estimates table, star(.05 .01 .001)

tab correct_whole_d if e(sample) == 1


************************
** SUMMARY STATISTICS **
************************

label variable correct_whole_d "Election winner (DV)"
label variable discussion "Discussion"
label variable party "Favoured winner"
label variable interest "Interest"
label variable age "Age"
label variable male "Gender (male = 1)"
label variable education "Education"
label variable time "Time of interview"

estpost tabstat correct_whole_d discussion party interest age male education time if e(sample) == 1, statistics(n mean p50 sd min max) columns(statistics)

esttab using "esA2019_cis_summary_national.tex", substitute("\begin{table}[htbp]" "\begin{table}[H]") cells("count(label(N) fmt(%9.0fc)) mean(fmt(%5.2f) label(Mean)) p50(fmt(%5.1f) label(Median)) sd(fmt(%5.1f) label(Std. dev.)) min(fmt(%5.1f) label(Min)) max(fmt(%5.1f) label(Max))") label width(\textwidth) nomtitle nonumber noobs booktabs title("Summary statistics -- April 2019 Spanish general election (CIS), national level") replace

eststo clear

log close